---
title: "tract_figures"
author: "Matthew Gordon"
date: "9/27/2020"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
rootdir <- dirname(getwd())
```

## Maps and figures for ACS data by census tract
Load packages, data, and shapefiles

```{r, message=FALSE, echo=FALSE, warning=FALSE}
library(tidyverse)
library(sf)
library(lwgeom)
library(gridExtra)
library(scales)
library(haven)
library(data.table)

shp.tract <- st_read(paste(rootdir, "/Data/Census Tracts shapefiles/", sep = ""), 
                     layer = "geo_export_79d01148-4a56-4331-a800-1e4f76bed5aa")

df.master <- read_csv(paste(rootdir, "/Data/master.csv", sep = ""), na = c("", "NA", ".")) %>%
        mutate(caserate = cumulative_cases/tot_population,
               hosprate = cumulative_hospitalized/tot_population,
               deathrate = cumulative_total_deaths/tot_population)

df.boros <- data.frame(boro_name = c("Manhattan", "Brooklyn", "Staten Island", "Queens", "Bronx"),
                       county = c("36061", "36047", "36085", "36081","36005"))

shp.tract <- merge(shp.tract, df.boros, by = "boro_name")
shp.tract <- mutate(shp.tract, tract = paste(county, ct2010, sep = ""))
shp.tract <- merge(shp.tract, df.master, by = c("tract", "puma", "boro_ct201", "boro_code", "ntaname"))

shp.tract <- mutate(shp.tract, sg_adj_pop = tot_population*(1+device_chg1020),
                    adj_death_rate = cumulative_total_deaths/sg_adj_pop,
                    adj_hosp_rate = cumulative_hospitalized/sg_adj_pop)

load(paste(rootdir, "/Data/Traffic and Roadways/nycrds.RData", sep = ""))

centroids <- st_centroid(shp.tract)

below110 <- st_union(filter(shp.tract, below_110th==1))

hwy_fes <- shp.tract %>%
        group_by(closesthwy) %>%
        summarize(geometry = st_union(geometry))

puma_fes <- shp.tract %>%
        group_by(puma) %>%
        summarize(geometry = st_union(geometry))

```



```{r, out.width = "100%"}
ggpm <- ggplot(filter(shp.tract, tot_population >200 & caserate < .2)) +
        geom_sf(aes(color = pm_idw, fill = pm_idw)) +
        scale_fill_viridis_c() + scale_color_viridis_c()  +
        geom_sf(data = below110, color = "red", alpha = .01) +
        labs(title =  "PM 2.5",  fill = "PM 2.5", color = "PM 2.5",
             caption = "Average concentrations 2010-2018 estimated using \n inverse distance weighting - NYCCAS", tag = "A") +
        theme_light() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))

ggpm
```

```{r, out.width = "100%"}
ggno2 <- ggplot(filter(shp.tract, tot_population >200 & caserate < .2)) +
        geom_sf(aes(color = no2_idw, fill = no2_idw)) +
        scale_fill_viridis_c() + scale_color_viridis_c()  +
        geom_sf(data = below110, color = "red", alpha = .01) +
        labs(title =  "NO2",  fill = "NO2", color = "NO2",
             caption = "Average concentrations 2010-2018 estimated using \n inverse distance weighting - NYCCAS", tag = "B") +
        theme_light() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))

ggno2
```

```{r, out.width = "100%"}
ggno <- ggplot(filter(shp.tract, tot_population >200 & caserate < .2)) +
        geom_sf(aes(color = no_idw, fill = no_idw)) +
        scale_fill_viridis_c() + scale_color_viridis_c()  +
        geom_sf(data = below110, color = "red", alpha = .01) +
        labs(title =  "NO",  fill = "NO", color = "NO",
             caption = "Average concentrations 2010-2018 estimated using \n inverse distance weighting - NYCCAS", tag = "C") +
        theme_light() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))
ggno
```

```{r, out.width = "100%"}
gginc <- ggplot(shp.tract) +
        geom_sf(aes(color = tot_inc_percap, fill = tot_inc_percap)) +
        geom_sf(data = below110, color = "green", alpha = .01)  +
        labs(title = "Per capita income", fill = "", color = "",
             caption = "2018 ACS", tag = "D") +
        scale_fill_gradient2(high = "red", low = "blue", labels = comma, midpoint = 50000) + 
        scale_color_gradient2(high = "red", low = "blue", labels = comma, midpoint = 50000) +
        theme_light() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))

gginc
```

```{r, out.width = "100%"}
ggdr <- ggplot(filter(shp.tract, tot_population >200 & caserate < .2)) +
        geom_sf(aes(color = caserate, fill = caserate)) +
        scale_fill_gradient2(high = "red", low = "blue", midpoint = .02, breaks = c(0, .025, .05, .075, .1)) + 
        scale_color_gradient2(high = "red", low = "blue", midpoint = .02, breaks = c(0, .025, .05, .075, .1)) +
        geom_sf(data = below110, color = "green", alpha = .01) +
        labs(title = "Case Rate", fill = "Case Rate", color = "Case Rate",
             caption = "Cases from DOHMH, Population from 2018 ACS", tag = "F") +
        theme_light() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))

ggdr
```

```{r, out.width = "100%"}
ggdc <- ggplot(filter(shp.tract, tot_population >200 & caserate < .2)) +
        geom_sf(aes(color = device_chg1020, fill = device_chg1020)) +
        scale_fill_gradient2(high = "blue", low = "red") + scale_color_gradient2(high = "blue", low = "red") +
        geom_sf(data = below110, color = "green", alpha = .01) +
        labs(title = "Mobile 'Home' device change", fill = "% change", color = "% change",
             caption = "Change in number of devices observed between \n week 10 and week 20 of 2020 - Safegraph", tag = "E") +
        theme_light() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))


print(ggdc)
```

```{r, out.width="100%"}
ggmaps <- grid.arrange(ggpm, ggno2, ggno, gginc, ggdc, ggdr, nrow = 2)
ggmaps
```


```{r, out.width = "100%"}
ggdeathr <- ggplot(filter(shp.tract, tot_population >200 & caserate < .2 & tot_households >50)) +
        geom_sf(aes(color = deathrate, fill = deathrate)) +
        scale_fill_continuous(type = "viridis", trans="log", limits = c(.0001,.01), oob = scales::squish, 
                              breaks = c(.0001, .0003, .0009, .0024, .0067)) + 
        scale_color_continuous(type = "viridis", trans="log", limits = c(.0001,.01), oob = scales::squish, 
                               breaks = c(.0001, .0003, .0009, .0024, .0067))  +
        geom_sf(data = below110, color = "red", alpha = .01) +
        labs(title = "Death Rate",  caption = "Deaths from DOHMH \n Population from 2018 ACS", fill = "Death Rate", color = "Death Rate", tag = "A") +
        theme_light() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))

ggdeathr
```

```{r, out.width = "100%"}
ggdeathsg <- ggplot(filter(shp.tract, tot_population >200 & caserate < .2 & tot_households >50)) +
        geom_sf(aes(color = adj_death_rate, fill = adj_death_rate)) +
        scale_fill_continuous(type = "viridis", trans="log", limits = c(.0001,.01), oob = scales::squish, breaks = c(.0001, .0003, .0009, .0024, .0067)) + 
        scale_color_continuous(type = "viridis", trans="log", limits = c(.0001,.01), oob = scales::squish, breaks = c(.0001, .0003, .0009, .0024, .0067)) +
        geom_sf(data = below110, color = "red", alpha = .01) +
        labs(title = "Death rate adjusted for device-count change",  caption = "Deaths from DOHMH \n Population from 2018 ACS and safegraph",
             fill = "Adjusted\n Death Rate", color = "Adjusted\n Death Rate", tag = "B") +
        theme_light() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))

ggdeathsg
```

```{r, out.width = "100%"}
gghospr <- ggplot(filter(shp.tract, tot_population >200 & caserate < .2 & tot_households >50)) +
        geom_sf(aes(color = hosprate, fill = hosprate)) +
        scale_fill_continuous(type = "viridis", trans="log", limits = c(.0001,.01), oob = scales::squish, breaks = c(.0003, .0025, .018)) + 
        scale_color_continuous(type = "viridis", trans="log", limits = c(.0001,.01), oob = scales::squish, breaks = c(.0003, .0025, .018)) +
        geom_sf(data = below110, color = "red", alpha = .01) +
        labs(title = "Hospitalization Rate",  caption = "Hospitalizations from DOHMH \n Population from 2018 ACS", 
             fill = "Hospitalization\n Rate", color = "Hospitalization\n Rate", tag = "D") +
        theme_bw() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))

gghospr
```



```{r, out.width = "100%"}
gghospsg <- ggplot(filter(shp.tract, tot_population >200 & caserate < .2 & tot_households >50)) +
        geom_sf(aes(color = adj_hosp_rate, fill = adj_hosp_rate)) +
        scale_fill_continuous(type = "viridis", trans="log", limits = c(.0001,.01), oob = scales::squish, breaks = c(.0003, .0025, .018)) + 
        scale_color_continuous(type = "viridis", trans="log", limits = c(.0001,.01), oob = scales::squish, breaks = c(.0003, .0025, .018)) +
        geom_sf(data = below110, color = "red", alpha = .01) +
        labs(title = "Hospitalization Rates adjusted for \n device-count change",  
             caption = "Hospitalizations from DOHMH \n Population from 2018 ACS and safegraph", 
             fill = "Adjusted\n Hospitalization\n Rate", color = "Adjusted\n Hospitalization\n Rate", tag = "E") +
        theme_bw() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank(),
              plot.caption = element_text(size = 10))

gghospsg
```


```{r, out.width="100%"}
dscat <- ggplot(shp.tract) +
        geom_point(aes(x = deathrate, y = adj_death_rate, shape = as.factor(below_110th), color = as.factor(below_110th)), alpha = .6) +
        scale_shape_manual(values = c(1, 3), labels = c("Other", "Manhattan Below 110th St")) + 
        scale_color_manual(values = c("#FF6666", "#33FFFF"), labels = c("Other", "Manhattan Below 110th St")) + 
        xlim(0,.006) + ylim(0, .0075) + 
        geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
        labs(title = "Tract Death Rate Comparison", x = "Death Rate", y = "Adjusted Death Rate", tag = "C") + 
        guides(shape = guide_legend(reverse = TRUE), color = guide_legend(reverse = TRUE)) +
        theme_bw() +
        theme(legend.text = element_text(size=12), legend.title = element_blank(),
              legend.position = "bottom")

dscat
```

```{r, out.width="100%"}
hscat <- ggplot(shp.tract) +
        geom_point(aes(x = hosprate, y = adj_hosp_rate, shape = as.factor(below_110th), color = as.factor(below_110th)), alpha = .6) +
        scale_shape_manual(values = c(1, 3), labels = c("Other", "Manhattan Below 110th St")) + 
        scale_color_manual(values = c("#FF6666", "#33FFFF"), labels = c("Other", "Manhattan Below 110th St")) + 
        xlim(0,.015) + ylim(0, .02) +
        geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
        labs(title = "Tract Hospitalization Rate Comparison", x = "Hospitalization Rate", y = "Adjusted Hospitalization Rate", tag = "F") +
        guides(shape = guide_legend(reverse = TRUE), color = guide_legend(reverse = TRUE)) +
        theme_bw() +
        theme(legend.text = element_text(size=12), legend.title = element_blank(),
              legend.position = "bottom")

hscat
```


```{r, out.width="100%"}
sgadj <- grid.arrange(ggdeathr, ggdeathsg, dscat, gghospr, gghospsg, hscat, nrow = 2, widths = c(6,6,5))
```


```{r, out.width = "100%"}
ggdw1r <- ggplot(shp.tract) +
        geom_sf(aes(color = downwind_0_r, fill = downwind_0_r)) +
        geom_sf(data = puma_fes, alpha = .01) + 
        geom_sf(data = roads.sf, color = "red") +
        scale_fill_viridis_c(breaks = c(0, .25, .5, .75)) + scale_color_viridis_c(breaks = c(0, .25, .5, .75)) +
        labs(title = "Days Downwind from Highways within 0.5km", color = "% of Days", fill = "% of Days") +
        theme_bw() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank())

ggdw1r

shp.tract <- mutate(shp.tract, downwind_0_na = if_else(near_dist>500, as.numeric(NA), downwind_0_r))

ggdw1r <- ggplot(shp.tract) +
        geom_sf(aes(color = downwind_0_na, fill = downwind_0_r)) +
        geom_sf(data = puma_fes, alpha = .01) + 
        geom_sf(data = roads.sf, color = "red") +
        scale_fill_viridis_c(breaks = c(0, .25, .5, .75)) + scale_color_viridis_c(breaks = c(0, .25, .5, .75)) +
        labs(title = "Days Downwind from Highways within 0.5km", color = "% of Days", fill = "% of Days") +
        theme_bw() +
        theme(legend.text = element_text(size=12), legend.title = element_text(size = 12),
              axis.text.x = element_blank(), axis.text.y = element_blank())

ggdw1r
```

```{r}
centroids <- st_centroid(shp.tract)
roads.sf <- readRDS(paste(rootdir, "/Data/Traffic and Roadways/nycrds.RDS", sep = ""))
df.tracts <- read_csv(paste(rootdir, "/Data/Traffic and Roadways/tract_roads_distance_bearing.csv", sep = ""))
roads.sf <- st_transform(roads.sf, st_crs(centroids))
```

```{r}
### make grid to define road segments (.01 degree cell = approx 1km)
grid.sf <- st_make_grid(shp.tract, cellsize = .01)
roadsegs.sf <- st_split(roads.sf, grid.sf)
roadsegs.sf <- st_sf(geometry = st_collection_extract(roadsegs.sf, "LINESTRING"))
roadsegs.sf$index  <- c(1:nrow(roadsegs.sf))

t <- 1000700

tct1rds <- filter(roadsegs.sf, index %in% df.tracts$road_index[df.tracts$boro_ct201==t])
tct1closepts <- st_nearest_points(centroids[centroids$boro_ct201==t,], tct1rds)
tct1closepts.pt <- st_cast(tct1closepts, "POINT")
tct1closepts.pt <- tct1closepts.pt[seq(2, length(tct1closepts.pt), 2)]

tct1dists <- as.numeric(st_distance(centroids[centroids$boro_ct201==t,], tct1rds))
tct_dist_grps <- if_else(tct1dists < 500, "Within .5km", 
                         if_else(tct1dists < 1000, ".5-1km",
                                 if_else(tct1dists < 2000, "1-2km", 
                                         if_else(tct1dists < 3000, "2-3km", "3-5km"))))

sampletrct <- ggplot() +
        geom_sf(data = shp.tract[shp.tract$boro_ct201==t,]) +
        geom_sf(data = tct1rds[tct1dists < 1000,], color = "black") +
        geom_sf(data = tct1closepts[tct1dists < 1000], aes(color = tct_dist_grps[tct1dists < 1000]), alpha = .5) +
        labs(x = "", y = "", color = "Highway Distance") +
        theme_light() + 
        theme(legend.text = element_text(size=20), legend.title = element_text(size = 20),
                axis.text.x = element_blank(), axis.text.y = element_blank(),
                plot.caption = element_text(size = 16))

sampletrct
```

```{r, out.width = "100%"}
df.wind <- read_stata(paste(rootdir, "/Data/Weather/Global Hourly/all_2008_2018_hourly.dta", sep = ""))

df.wind <- filter(df.wind, name %in% c("NEWARK LIBERTY INTERNATIONAL AIRPORT, NJ US",
                                       "JFK INTERNATIONAL AIRPORT, NY US",
                                       "LAGUARDIA AIRPORT, NY US",
                                       "TETERBORO AIRPORT, NJ US") & hourly_wind_speed  > .44704  & is.na(hourly_wind_direction)==FALSE) %>%
        select(name, hourly_wind_speed, hourly_wind_direction, year, month, day) %>%
        rename(station = name)
```

```{r}
windhist <- ggplot(df.wind) +
        geom_histogram(aes(x = hourly_wind_direction, fill = as.factor(month)), bins = 36) +
        facet_wrap(~station, scales = "free") +
        theme_minimal() + 
        theme(strip.text = element_text(size=7)) +
        labs(title = "Histogram of Hourly Wind Directions at 4 NYC area airports", fill = "Month",
             x = "Wind direction (0/360 = North)", y = "Count of Hours")
windhist
```
